Change with new COVID file for most up to date info
covid_raw = read.csv("05-21-2020.csv",header = TRUE)
#covid_ts_confirmed = read.csv(file.choose(),header = TRUE)
summary(covid_raw)
## Province_State Country_Region Last_Update Lat
## Alabama : 1 US:58 2020-05-22 02:36:57:58 Min. :-14.27
## Alaska : 1 1st Qu.: 34.59
## American Samoa: 1 Median : 39.06
## Arizona : 1 Mean : 36.84
## Arkansas : 1 3rd Qu.: 42.36
## California : 1 Max. : 61.37
## (Other) :52 NA's :2
## Long_ Confirmed Deaths Recovered
## Min. :-170.13 Min. : 0 Min. : 0 Min. : 13
## 1st Qu.:-101.17 1st Qu.: 2608 1st Qu.: 74 1st Qu.: 999
## Median : -87.94 Median : 10404 Median : 398 Median : 3122
## Mean : -85.21 Mean : 27192 Mean : 1633 Mean : 7105
## 3rd Qu.: -76.97 3rd Qu.: 30109 3rd Qu.: 1422 3rd Qu.: 7327
## Max. : 145.67 Max. :356458 Max. :28743 Max. :62826
## NA's :2 NA's :16
## Active FIPS Incident_Rate People_Tested
## Min. : 0.0 Min. : 1.00 Min. : 0.0 Min. : 124
## 1st Qu.: 874.5 1st Qu.: 18.25 1st Qu.: 150.1 1st Qu.: 47444
## Median : 5850.5 Median : 32.50 Median : 254.6 Median : 142791
## Mean : 20414.3 Mean : 3288.09 Mean : 403.8 Mean : 233146
## 3rd Qu.: 19697.5 3rd Qu.: 47.75 3rd Qu.: 482.2 3rd Qu.: 292469
## Max. :264889.0 Max. :99999.00 Max. :1832.4 Max. :1555055
## NA's :2 NA's :2
## People_Hospitalized Mortality_Rate UID ISO3
## Min. : 65 Min. :0.000 Min. : 16 ASM: 1
## 1st Qu.: 563 1st Qu.:3.030 1st Qu.:84000012 GUM: 1
## Median : 1534 Median :4.394 Median :84000028 MNP: 1
## Mean : 4890 Mean :4.446 Mean :76761944 PRI: 1
## 3rd Qu.: 4289 3rd Qu.:5.461 3rd Qu.:84000042 USA:53
## Max. :76608 Max. :9.585 Max. :84099999 VIR: 1
## NA's :24 NA's :1
## Testing_Rate Hospitalization_Rate
## Min. : 99.3 Min. : 6.46
## 1st Qu.: 2775.4 1st Qu.: 9.86
## Median : 3574.1 Median :14.12
## Mean : 4025.5 Mean :14.29
## 3rd Qu.: 4981.7 3rd Qu.:17.47
## Max. :11645.4 Max. :27.92
## NA's :2 NA's :24
#covid_ts_confirmed
Initial lookinto ploting data points Note here
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot(covid_raw)
byState10 <- covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in First Ten Provinces")
ggplotly(byState10)
byState <- covid_raw %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byState)
byStateNoNY <- filter(covid_raw, Province_State != 'New York' & Province_State != 'New Jersey' & Province_State != 'Hubei') %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byStateNoNY)
#covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus China)")
statesAbove700Deaths <- filter(covid_raw, Deaths > 700) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Over 700 Mortality States")
ggplotly(statesAbove700Deaths)
statesAbove700Deaths <- filter(covid_raw, Deaths < 700) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Under 700 Mortality States")
ggplotly(statesAbove700Deaths)
library(e1071)
library(usmap)
UScovid_dataset <- filter(covid_raw, Country_Region == 'US' & FIPS != 'NA')
UScovid_dataset
## Province_State Country_Region Last_Update Lat
## 1 Alabama US 2020-05-22 02:36:57 32.3182
## 2 Alaska US 2020-05-22 02:36:57 61.3707
## 3 American Samoa US 2020-05-22 02:36:57 -14.2710
## 4 Arizona US 2020-05-22 02:36:57 33.7298
## 5 Arkansas US 2020-05-22 02:36:57 34.9697
## 6 California US 2020-05-22 02:36:57 36.1162
## 7 Colorado US 2020-05-22 02:36:57 39.0598
## 8 Connecticut US 2020-05-22 02:36:57 41.5978
## 9 Delaware US 2020-05-22 02:36:57 39.3185
## 10 Diamond Princess US 2020-05-22 02:36:57 NA
## 11 District of Columbia US 2020-05-22 02:36:57 38.8974
## 12 Florida US 2020-05-22 02:36:57 27.7663
## 13 Georgia US 2020-05-22 02:36:57 33.0406
## 14 Grand Princess US 2020-05-22 02:36:57 NA
## 15 Guam US 2020-05-22 02:36:57 13.4443
## 16 Hawaii US 2020-05-22 02:36:57 21.0943
## 17 Idaho US 2020-05-22 02:36:57 44.2405
## 18 Illinois US 2020-05-22 02:36:57 40.3495
## 19 Indiana US 2020-05-22 02:36:57 39.8494
## 20 Iowa US 2020-05-22 02:36:57 42.0115
## 21 Kansas US 2020-05-22 02:36:57 38.5266
## 22 Kentucky US 2020-05-22 02:36:57 37.6681
## 23 Louisiana US 2020-05-22 02:36:57 31.1695
## 24 Maine US 2020-05-22 02:36:57 44.6939
## 25 Maryland US 2020-05-22 02:36:57 39.0639
## 26 Massachusetts US 2020-05-22 02:36:57 42.2302
## 27 Michigan US 2020-05-22 02:36:57 43.3266
## 28 Minnesota US 2020-05-22 02:36:57 45.6945
## 29 Mississippi US 2020-05-22 02:36:57 32.7416
## 30 Missouri US 2020-05-22 02:36:57 38.4561
## 31 Montana US 2020-05-22 02:36:57 46.9219
## 32 Nebraska US 2020-05-22 02:36:57 41.1254
## 33 Nevada US 2020-05-22 02:36:57 38.3135
## 34 New Hampshire US 2020-05-22 02:36:57 43.4525
## 35 New Jersey US 2020-05-22 02:36:57 40.2989
## 36 New Mexico US 2020-05-22 02:36:57 34.8405
## 37 New York US 2020-05-22 02:36:57 42.1657
## 38 North Carolina US 2020-05-22 02:36:57 35.6301
## 39 North Dakota US 2020-05-22 02:36:57 47.5289
## 40 Northern Mariana Islands US 2020-05-22 02:36:57 15.0979
## 41 Ohio US 2020-05-22 02:36:57 40.3888
## 42 Oklahoma US 2020-05-22 02:36:57 35.5653
## 43 Oregon US 2020-05-22 02:36:57 44.5720
## 44 Pennsylvania US 2020-05-22 02:36:57 40.5908
## 45 Puerto Rico US 2020-05-22 02:36:57 18.2208
## 46 Rhode Island US 2020-05-22 02:36:57 41.6809
## 47 South Carolina US 2020-05-22 02:36:57 33.8569
## 48 South Dakota US 2020-05-22 02:36:57 44.2998
## 49 Tennessee US 2020-05-22 02:36:57 35.7478
## 50 Texas US 2020-05-22 02:36:57 31.0545
## 51 Utah US 2020-05-22 02:36:57 40.1500
## 52 Vermont US 2020-05-22 02:36:57 44.0459
## 53 Virgin Islands US 2020-05-22 02:36:57 18.3358
## 54 Virginia US 2020-05-22 02:36:57 37.7693
## 55 Washington US 2020-05-22 02:36:57 47.4009
## 56 West Virginia US 2020-05-22 02:36:57 38.4912
## 57 Wisconsin US 2020-05-22 02:36:57 44.2685
## 58 Wyoming US 2020-05-22 02:36:57 42.7560
## Long_ Confirmed Deaths Recovered Active FIPS Incident_Rate
## 1 -86.9023 13288 529 NA 12759 1 271.00752
## 2 -152.4044 401 10 356 35 2 54.81549
## 3 -170.1320 0 0 NA 0 60 0.00000
## 4 -111.4312 15348 764 3872 10712 4 210.86134
## 5 -92.3731 5458 110 3915 1433 5 180.85999
## 6 -119.6816 88031 3583 NA 84448 6 222.79435
## 7 -105.3111 23191 1310 3532 18349 8 402.70990
## 8 -72.7554 39208 3583 6264 29361 9 1099.71511
## 9 -75.5071 8386 317 4130 3939 10 861.19429
## 10 NA 49 0 NA 49 88888 NA
## 11 -77.0268 7788 412 1061 6315 11 1103.50847
## 12 -81.6868 48675 2144 NA 46531 12 226.63002
## 13 -83.6431 40663 1775 NA 38888 13 382.98371
## 14 NA 103 3 NA 100 99999 NA
## 15 144.7937 165 5 125 35 66 100.46947
## 16 -157.4983 647 17 578 52 15 45.69622
## 17 -114.4788 2506 77 1688 741 16 140.22993
## 18 -88.9861 102688 4607 NA 98081 17 810.36498
## 19 -86.2583 29936 1913 NA 28023 18 444.66765
## 20 -93.2105 16170 410 8672 7088 19 512.50844
## 21 -96.7265 8625 204 473 7948 20 296.05460
## 22 -84.6701 8286 386 2919 4981 21 185.46568
## 23 -91.8678 36504 2629 26249 7626 22 785.23591
## 24 -69.3819 1877 73 1145 659 23 139.63571
## 25 -76.8021 43531 2159 3099 38273 24 720.03480
## 26 -71.5301 90084 6148 NA 83936 25 1306.98529
## 27 -84.5361 53510 5129 28234 20147 26 535.80421
## 28 -93.9002 18200 818 12488 4894 27 322.71609
## 29 -89.6787 12222 580 7681 3961 28 410.66492
## 30 -92.2884 11689 668 NA 11021 29 190.45437
## 31 -110.4544 479 16 440 23 30 44.81754
## 32 -98.2681 11427 138 NA 11289 31 590.72336
## 33 -117.0554 7400 383 339 6678 32 240.24757
## 34 -71.5639 3935 199 1388 2348 33 289.39973
## 35 -74.5210 151586 10846 24236 116504 34 1706.62866
## 36 -106.2485 6472 294 1985 4193 35 308.65655
## 37 -74.9481 356458 28743 62826 264889 36 1832.35347
## 38 -79.8064 20512 728 11637 8147 37 195.57433
## 39 -99.7840 2229 51 1340 838 38 292.49589
## 40 145.6739 22 2 13 7 69 39.89555
## 41 -82.7649 30167 1837 NA 28330 39 258.07804
## 42 -96.9289 5680 304 4361 1015 40 143.54414
## 43 -122.0709 3817 145 1406 2266 41 90.49877
## 44 -77.2098 69252 4869 NA 64383 42 540.94719
## 45 -66.5901 2913 126 NA 2787 72 99.30429
## 46 -71.5118 13571 556 1047 11968 44 1281.05528
## 47 -80.9450 9381 416 5451 3514 45 182.20084
## 48 -99.4388 4177 48 3145 984 46 472.15933
## 49 -86.6923 18961 313 12191 6457 47 277.64705
## 50 -97.5635 53053 1460 30341 21252 48 182.96737
## 51 -111.8624 7874 92 4596 3186 49 245.60521
## 52 -72.7107 950 54 827 69 50 152.24627
## 53 -64.8963 69 6 61 2 78 64.32487
## 54 -78.1700 34137 1100 4778 28259 51 399.94053
## 55 -121.4905 19117 1044 NA 18073 53 251.04752
## 56 -80.9545 1593 70 983 540 54 88.88780
## 57 -89.6165 13885 487 8012 5386 55 238.47415
## 58 -107.3025 801 12 534 255 56 138.39958
## People_Tested People_Hospitalized Mortality_Rate UID ISO3 Testing_Rate
## 1 170739 1528 3.981036 84000001 USA 3482.20595
## 2 39545 NA 2.493766 84000002 USA 5405.68249
## 3 124 NA NA 16 ASM 222.85725
## 4 171627 1830 4.977847 84000004 USA 2357.92929
## 5 99276 535 2.015390 84000005 USA 3289.67686
## 6 1421127 NA 4.070157 84000006 USA 3596.67691
## 7 135611 3990 5.648743 84000008 USA 2354.87440
## 8 202747 10946 9.138441 84000009 USA 5686.69507
## 9 47542 NA 3.780110 84000010 USA 4882.29181
## 10 NA NA 0.000000 84088888 USA NA
## 11 41756 NA 5.290190 84000011 USA 5916.55107
## 12 813929 9200 4.404725 84000012 USA 3789.64041
## 13 407731 7235 4.365148 84000013 USA 3840.20680
## 14 NA NA 2.912621 84099999 USA NA
## 15 5064 NA 3.030303 316 GUM 3083.49926
## 16 47149 82 2.627512 84000015 USA 3330.03266
## 17 38888 221 3.072626 84000016 USA 2176.08201
## 18 672020 NA 4.486405 84000017 USA 5303.26304
## 19 202995 4389 6.390299 84000018 USA 3015.27624
## 20 116565 NA 2.535560 84000019 USA 3694.52976
## 21 71203 760 2.365217 84000020 USA 2444.05512
## 22 158672 2016 4.658460 84000021 USA 3551.55805
## 23 305381 NA 7.201950 84000022 USA 6569.03704
## 24 37327 235 3.889185 84000023 USA 2776.86853
## 25 220233 7485 4.959684 84000024 USA 3642.81603
## 26 501486 9040 6.824741 84000025 USA 7275.81838
## 27 454740 NA 9.585124 84000026 USA 4553.38451
## 28 173556 2380 4.494505 84000027 USA 3077.43484
## 29 125970 1932 4.745541 84000028 USA 4232.65099
## 30 162092 NA 5.714775 84000029 USA 2641.04117
## 31 30524 65 3.340292 84000030 USA 2855.97196
## 32 75640 NA 1.207666 84000031 USA 3910.24024
## 33 94382 NA 5.175676 84000032 USA 3064.19545
## 34 52830 385 5.057179 84000033 USA 3885.38447
## 35 544274 NA 7.155014 84000034 USA 6127.70049
## 36 147344 1139 4.542645 84000035 USA 7026.99171
## 37 1555055 76608 8.063503 84000036 USA 7993.67787
## 38 290645 NA 3.549142 84000037 USA 2771.19253
## 39 61279 144 2.288022 84000038 USA 8041.20925
## 40 4089 NA 9.090909 580 MNP 7415.13129
## 41 299078 5295 6.089435 84000039 USA 2558.60588
## 42 149595 917 5.352113 84000040 USA 3780.54325
## 43 105224 732 3.798795 84000041 USA 2494.79757
## 44 368906 NA 7.030844 84000042 USA 2881.63035
## 45 2913 NA 4.325438 630 PRI 99.30429
## 46 123367 1506 4.096971 84000044 USA 11645.41644
## 47 138238 1444 4.434495 84000045 USA 2684.90345
## 48 31301 342 1.149150 84000046 USA 3538.19946
## 49 360583 1539 1.650757 84000047 USA 5280.03826
## 50 770241 NA 2.751965 84000048 USA 2656.38075
## 51 182874 647 1.168402 84000049 USA 5704.19201
## 52 25701 NA 5.684211 84000050 USA 4118.82261
## 53 1383 NA 8.695652 850 VIR 1289.29410
## 54 218599 6269 3.222310 84000051 USA 2561.05106
## 55 297942 3125 5.461108 84000053 USA 3912.62228
## 56 83141 NA 4.394225 84000054 USA 4639.18417
## 57 177123 2218 3.507382 84000055 USA 3042.07828
## 58 18840 75 1.498127 84000056 USA 3255.24096
## Hospitalization_Rate
## 1 11.499097
## 2 NA
## 3 NA
## 4 11.923378
## 5 9.802125
## 6 NA
## 7 17.204950
## 8 27.917772
## 9 NA
## 10 NA
## 11 NA
## 12 18.900873
## 13 17.792588
## 14 NA
## 15 NA
## 16 12.673879
## 17 8.818835
## 18 NA
## 19 14.661277
## 20 NA
## 21 8.811594
## 22 24.330196
## 23 NA
## 24 12.519979
## 25 17.194643
## 26 10.035078
## 27 NA
## 28 13.076923
## 29 15.807560
## 30 NA
## 31 13.569937
## 32 NA
## 33 NA
## 34 9.783990
## 35 NA
## 36 17.598888
## 37 21.491452
## 38 NA
## 39 6.460296
## 40 NA
## 41 17.552292
## 42 16.144366
## 43 19.177364
## 44 NA
## 45 NA
## 46 11.097193
## 47 15.392815
## 48 8.187695
## 49 8.116661
## 50 NA
## 51 8.216916
## 52 NA
## 53 NA
## 54 18.364238
## 55 16.346707
## 56 NA
## 57 15.974073
## 58 9.363296
#UScovid_dataset$fips <- fips(brew_count_by_state$state)
attach(UScovid_dataset)
UScovid_dataset_fips <- UScovid_dataset[order(FIPS),]
detach(UScovid_dataset)
UScovid_dataset_fips$fips = UScovid_dataset_fips$FIPS
plot_usmap(data = UScovid_dataset_fips, values = "Deaths", color = rgb(.2, .7, 1)) +
labs(title = "Covid Deaths by State", subtitle = "Count of Covid19 Deaths per state") +
scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York'), values = "Deaths", color = rgb(.2, .7, 1)) +
labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") +
scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York' & Province_State != 'New Jersey'), values = "Deaths", color = rgb(.2, .7, 1)) +
labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") +
scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
confirmed_by_country <- covid_raw%>% group_by(Country_Region) %>% tally(Confirmed, name = "Confirmed", sort = TRUE)
confirmed_by_country
## # A tibble: 1 x 2
## Country_Region Confirmed
## <fct> <int>
## 1 US 1577147
deaths_by_country <- covid_raw%>% group_by(Country_Region) %>% tally(Deaths, name = "Deaths", sort = TRUE)
deaths_by_country
## # A tibble: 1 x 2
## Country_Region Deaths
## <fct> <int>
## 1 US 94702
totals <- merge(confirmed_by_country, deaths_by_country, by="Country_Region")
totals
## Country_Region Confirmed Deaths
## 1 US 1577147 94702
Then reordered by Confirmed
top_to_least <- totals[order(totals$Confirmed, decreasing = TRUE),]
top_to_least
## Country_Region Confirmed Deaths
## 1 US 1577147 94702
top10Confirmed <- top_to_least %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top countries")
ggplotly(top10Confirmed)
# At the time, China was the highest and I wanted to look at the rest, now it is much different
top10ConfirmedMinusChina <- subset(top_to_least, Country_Region != "China") %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus China)")
ggplotly(top10ConfirmedMinusChina)
# Now removing US instead
top10ConfirmedMinusUS <- subset(top_to_least, Country_Region != "US") %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus US)")
ggplotly(top10ConfirmedMinusUS)